In this R Markdown document we will display and document each step of CS329e R Project .
Project Requirements http://www.cs.utexas.edu/~cannata/dataVis/Projects/R%20Project%203/R%20Project%20Requirements.html.
In the source folder 01 Data, the R script Access Oracle Database.R was created to connect to Dr. Cannata’s Orcale database and generate a data frame from the uploaded Vocabulary.csv file. The RCurl and jsonlite package were loaded to allow general HTTP requests and process the results returned by the Web server.
Describe joined datasets.
source("../01 Data/Access Oracle Database.R", echo = TRUE)
##
## > require("RCurl")
## Loading required package: RCurl
## Loading required package: bitops
##
## > require("jsonlite")
## Loading required package: jsonlite
##
## Attaching package: 'jsonlite'
##
## The following object is masked from 'package:utils':
##
## View
##
## > require("dplyr")
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## > require("ggplot2")
## Loading required package: ggplot2
##
## > quake <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT r1.LATITUDE, r1.LONGITUDE, r1.PLACE, r2.LATITUDE, r2. .... [TRUNCATED]
##
## > quake %>% ggplot(aes(x = LATITUDE, y = LONGITUDE,
## + color = PLACE)) + geom_point()
A second R script subset.R was created to display the vocab data frame that was previously created in the last step. The head(), tail(), tbl_df(), glimpse(), and summary() functions were used to return subsets of this data frame.
source("../01 Data/Subset Data.R", echo = TRUE)
##
## > require("dplyr")
##
## > head(quake)
## LATITUDE LONGITUDE PLACE LATITUDE.1 LONGITUDE.1
## 1 37.1418 -117.2618 51km WNW of Beatty, Nevada 35.5500 -96.7640
## 2 37.1418 -117.2618 51km WNW of Beatty, Nevada 35.5310 -96.7880
## 3 41.8797 -119.6241 65km ESE of Lakeview, Oregon 34.7740 -97.5960
## 4 41.8797 -119.6241 65km ESE of Lakeview, Oregon 35.6810 -97.0980
## 5 41.8797 -119.6241 65km ESE of Lakeview, Oregon 36.1309 -97.6291
## 6 41.8797 -119.6241 65km ESE of Lakeview, Oregon 35.8913 -97.2752
## PLACE.1 MAG MAG.1
## 1 Oklahoma 4.8 4.8
## 2 Oklahoma 4.8 4.8
## 3 Oklahoma 4.2 4.2
## 4 8km ENE of Luther, Oklahoma 4.2 4.2
## 5 20km N of Crescent, Oklahoma 4.2 4.2
## 6 6km SSW of Langston, Oklahoma 4.2 4.2
##
## > tail(quake)
## LATITUDE LONGITUDE PLACE LATITUDE.1 LONGITUDE.1
## 8370 39.399 -121.601 Northern California 35.6073 -97.3863
## 8371 40.418 -120.578 Northern California 35.5414 -96.7537
## 8372 40.394 -120.518 Northern California 33.0300 -100.7660
## 8373 40.394 -120.518 Northern California 35.1920 -97.3200
## 8374 40.394 -120.518 Northern California 35.6860 -97.0890
## 8375 40.394 -120.518 Northern California 35.7727 -97.4677
## PLACE.1 MAG MAG.1
## 8370 9km ESE of Edmond, Oklahoma 4.5 4.5
## 8371 Oklahoma 4.7 4.7
## 8372 western Texas 4.4 4.4
## 8373 Oklahoma 4.4 4.4
## 8374 12km ENE of Luther, Oklahoma 4.4 4.4
## 8375 12km SSW of Guthrie, Oklahoma 4.4 4.4
##
## > tbl_df(quake)
## Source: local data frame [8,375 x 8]
##
## LATITUDE LONGITUDE PLACE LATITUDE.1 LONGITUDE.1
## 1 37.1418 -117.2618 51km WNW of Beatty, Nevada 35.5500 -96.7640
## 2 37.1418 -117.2618 51km WNW of Beatty, Nevada 35.5310 -96.7880
## 3 41.8797 -119.6241 65km ESE of Lakeview, Oregon 34.7740 -97.5960
## 4 41.8797 -119.6241 65km ESE of Lakeview, Oregon 35.6810 -97.0980
## 5 41.8797 -119.6241 65km ESE of Lakeview, Oregon 36.1309 -97.6291
## 6 41.8797 -119.6241 65km ESE of Lakeview, Oregon 35.8913 -97.2752
## 7 41.8797 -119.6241 65km ESE of Lakeview, Oregon 35.9466 -96.7594
## 8 41.8797 -119.6241 65km ESE of Lakeview, Oregon 36.8479 -97.7016
## 9 41.8797 -119.6241 65km ESE of Lakeview, Oregon 36.2618 -97.2643
## 10 41.8797 -119.6241 65km ESE of Lakeview, Oregon 36.8147 -98.2915
## .. ... ... ... ... ...
## Variables not shown: PLACE.1 (fctr), MAG (dbl), MAG.1 (dbl)
##
## > glimpse(quake)
## Observations: 8375
## Variables:
## $ LATITUDE (dbl) 37.1418, 37.1418, 41.8797, 41.8797, 41.8797, 41.87...
## $ LONGITUDE (dbl) -117.2618, -117.2618, -119.6241, -119.6241, -119.6...
## $ PLACE (fctr) 51km WNW of Beatty, Nevada, 51km WNW of Beatty, N...
## $ LATITUDE.1 (dbl) 35.5500, 35.5310, 34.7740, 35.6810, 36.1309, 35.89...
## $ LONGITUDE.1 (dbl) -96.7640, -96.7880, -97.5960, -97.0980, -97.6291, ...
## $ PLACE.1 (fctr) Oklahoma, Oklahoma, Oklahoma, 8km ENE of Luther, ...
## $ MAG (dbl) 4.8, 4.8, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, ...
## $ MAG.1 (dbl) 4.8, 4.8, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, 4.2, ...
##
## > summary(quake)
## LATITUDE LONGITUDE
## Min. :32.73 Min. :-124.5
## 1st Qu.:34.37 1st Qu.:-120.1
## Median :36.82 Median :-118.5
## Mean :36.48 Mean :-118.7
## 3rd Qu.:37.64 3rd Qu.:-116.7
## Max. :41.94 Max. :-114.7
##
## PLACE LATITUDE.1
## Central California :2225 Min. :32.85
## Southern California :2203 1st Qu.:35.59
## Nevada :1321 Median :35.95
## Northern California :1077 Mean :35.96
## Greater Los Angeles area, California: 538 3rd Qu.:36.63
## Long Valley area, California : 342 Max. :39.16
## (Other) : 669
## LONGITUDE.1 PLACE.1 MAG
## Min. :-100.77 Oklahoma :1746 Min. :4.00
## 1st Qu.: -97.71 western Texas : 588 1st Qu.:4.00
## Median : -97.48 4km S of Cushing, Oklahoma : 498 Median :4.10
## Mean : -97.73 6km SSW of Langston, Oklahoma: 459 Mean :4.14
## 3rd Qu.: -97.10 12km SSW of Guthrie, Oklahoma: 420 3rd Qu.:4.20
## Max. : -95.88 19km S of Medford, Oklahoma : 300 Max. :5.60
## (Other) :4364
## MAG.1
## Min. :4.00
## 1st Qu.:4.00
## Median :4.10
## Mean :4.14
## 3rd Qu.:4.20
## Max. :5.60
##
##
## > str(quake)
## 'data.frame': 8375 obs. of 8 variables:
## $ LATITUDE : num 37.1 37.1 41.9 41.9 41.9 ...
## $ LONGITUDE : num -117 -117 -120 -120 -120 ...
## $ PLACE : Factor w/ 27 levels "10km S of Rancho Palos Verdes, California",..: 7 7 11 11 11 11 11 11 11 11 ...
## $ LATITUDE.1 : num 35.5 35.5 34.8 35.7 36.1 ...
## $ LONGITUDE.1: num -96.8 -96.8 -97.6 -97.1 -97.6 ...
## $ PLACE.1 : Factor w/ 26 levels "12km ENE of Luther, Oklahoma",..: 25 25 25 17 6 15 12 13 10 19 ...
## $ MAG : num 4.8 4.8 4.2 4.2 4.2 ...
## $ MAG.1 : num 4.8 4.8 4.2 4.2 4.2 ...
In this data wrangling section, several work flows are constructed using the %>% pipe operator which revealed new information about the data set.
source("../02 Data Wrangling/Joining Data.R", echo = TRUE)
##
## > require("RCurl")
##
## > require("jsonlite")
##
## > require("dplyr")
##
## > require("tidyr")
## Loading required package: tidyr
##
## > require("ggplot2")
##
## > Above4cali <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT * FROM ABOVE4CALI\""),
## + httpheader = c(DB .... [TRUNCATED]
##
## > Above4OU <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT * FROM ABOVE4OU\""),
## + httpheader = c(DB = "j ..." ... [TRUNCATED]
##
## > Recentca <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT * FROM RECENTCA\""),
## + httpheader = c(DB = "j ..." ... [TRUNCATED]
##
## > Recentou <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT * FROM RECENTOU\""),
## + httpheader = c(DB = "j ..." ... [TRUNCATED]
##
## > names(Above4OU)
## [1] "TIME" "LATITUDE" "LONGITUDE" "DEPTH" "MAG"
## [6] "MAGTYPE" "NST" "GAP" "DMIN" "RMS"
## [11] "NET" "ID" "UPDATED" "PLACE" "TYPE"
##
## > names(Above4cali)
## [1] "TIME" "LATITUDE" "LONGITUDE" "DEPTH" "MAG"
## [6] "MAGTYPE" "NST" "GAP" "DMIN" "RMS"
## [11] "NET" "ID" "UPDATED" "PLACE" "TYPE"
##
## > names(Recentca)
## [1] "TIME" "LATITUDE" "LONGITUDE" "DEPTH" "MAG"
## [6] "MAGTYPE" "NST" "GAP" "DMIN" "RMS"
## [11] "NET" "ID" "UPDATED" "PLACE" "TYPE"
##
## > names(Recentou)
## [1] "TIME" "LATITUDE" "LONGITUDE" "DEPTH" "MAG"
## [6] "MAGTYPE" "NST" "GAP" "DMIN" "RMS"
## [11] "NET" "ID" "UPDATED" "PLACE" "TYPE"
##
## > left_join(Above4cali, Above4OU, by = "MAG") %>% ggplot(aes(x = LATITUDE.x,
## + y = LONGITUDE.x, color = PLACE.x)) + geom_point()
##
## > right_join(Above4cali, Above4OU, by = "DEPTH") %>%
## + ggplot(aes(x = LATITUDE.x, y = LONGITUDE.x, color = PLACE.x)) +
## + geom_point()
##
## > inner_join(Above4cali, Above4OU, by = "MAG") %>% ggplot(aes(x = LATITUDE.x,
## + y = LONGITUDE.x, color = PLACE.x)) + geom_point()
##
## > full_join(Above4cali, Above4OU, by = "DEPTH") %>%
## + ggplot(aes(x = LATITUDE.x, y = LONGITUDE.x, color = PLACE.x)) +
## + geom_point()
##
## > semi_join(Above4cali, Above4OU, by = "MAG") %>% ggplot(aes(x = LATITUDE,
## + y = LONGITUDE, color = PLACE)) + geom_point()
##
## > anti_join(Above4cali, Above4OU, by = "DEPTH") %>%
## + ggplot(aes(x = LATITUDE, y = LONGITUDE, color = PLACE)) +
## + geom_point()
##
## > bind_rows(Above4cali, Above4OU) %>% ggplot(aes(x = LATITUDE,
## + y = LONGITUDE, color = PLACE)) + geom_point()
Here are our Beautiful Graphs. The first graph shows the typical range and medium of magnitudes per state. Outliers are also shown. The second graph shows the number of earthquakes above magnitude 4 that have occurred since 1950 in the selected state. This exemplifies just how many powerful earthquakes have occurred in California compared to the other states.
source("../02 Data Wrangling/Data Wrangling.R", echo = TRUE)
##
## > require("dplyr")
##
## > require("tidyr")
##
## > require("jsonlite")
##
## > names(Above4OU)
## [1] "TIME" "LATITUDE" "LONGITUDE" "DEPTH" "MAG"
## [6] "MAGTYPE" "NST" "GAP" "DMIN" "RMS"
## [11] "NET" "ID" "UPDATED" "PLACE" "TYPE"
##
## > names(Above4cali)
## [1] "TIME" "LATITUDE" "LONGITUDE" "DEPTH" "MAG"
## [6] "MAGTYPE" "NST" "GAP" "DMIN" "RMS"
## [11] "NET" "ID" "UPDATED" "PLACE" "TYPE"
##
## > names(Recentca)
## [1] "TIME" "LATITUDE" "LONGITUDE" "DEPTH" "MAG"
## [6] "MAGTYPE" "NST" "GAP" "DMIN" "RMS"
## [11] "NET" "ID" "UPDATED" "PLACE" "TYPE"
##
## > names(Recentou)
## [1] "TIME" "LATITUDE" "LONGITUDE" "DEPTH" "MAG"
## [6] "MAGTYPE" "NST" "GAP" "DMIN" "RMS"
## [11] "NET" "ID" "UPDATED" "PLACE" "TYPE"
##
## > justCA <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT MAG,PLACE FROM CLEANEDRECENTCAL \""),
## + httphea .... [TRUNCATED]
##
## > justOU <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT MAG, PLACE FROM CLEANEDRECENTOU \""),
## + httphea .... [TRUNCATED]
##
## > combinedAll <- bind_rows(justCA, justOU)
##
## > p2 <- combinedAll %>% ggplot(aes(x = PLACE, y = MAG,
## + color = PLACE)) + geom_boxplot()
##
## > p2 <- p2 + theme(legend.position = "none") + labs(x = "State",
## + y = "Magnitude") + theme(plot.title = element_text(size = 20,
## + face = "b ..." ... [TRUNCATED]
##
## > p2
##
## > above4CA <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT MAG, PLACE FROM CLEANABOVE4CALI \""),
## + httph .... [TRUNCATED]
##
## > above4OU <- data.frame(fromJSON(getURL(URLencode("129.152.144.84:5001/rest/native/?query=\"SELECT MAG, PLACE FROM CLEANEDABOVE4OU \""),
## + httph .... [TRUNCATED]
##
## > p3 <- above4CA %>% ggplot(aes(x = PLACE, color = PLACE)) +
## + geom_bar(stat = "bin")
##
## > p3 <- p3 + scale_y_continuous(limits = c(0, 1250)) +
## + theme(legend.position = "none") + labs(x = "State", y = "Number of Level 4 and Above Ear ..." ... [TRUNCATED]
##
## > p3 <- p3 + theme(plot.title = element_text(size = 20,
## + face = "bold"))
##
## > p4 <- above4OU %>% ggplot(aes(x = PLACE, color = PLACE),
## + show_guide = FALSE) + geom_bar(stat = "bin")
##
## > p4 <- p4 + scale_y_continuous(limits = c(0, 1250)) +
## + theme(legend.position = "none") + labs(x = "State", y = "Number of Level 4 and Above Ear ..." ... [TRUNCATED]
##
## > require(grid)
## Loading required package: grid
##
## > p2
##
## > pushViewport(viewport(layout = grid.layout(1, 2)))
##
## > print(p3, vp = viewport(layout.pos.row = 1, layout.pos.col = 1))
##
## > print(p4, vp = viewport(layout.pos.row = 1, layout.pos.col = 2))
13.Include in your html file a PNG image that shows a characterization of the categorical columns (as group by plots) and measure columns (as histograms)
#source("../03 Visualizations/PNG .R", echo = TRUE)